home *** CD-ROM | disk | FTP | other *** search
Text File | 1998-04-16 | 2.9 KB | 91 lines | [TEXT/CWIE] |
- // TWVector.h
- // Copyright: © 1994 - 1998 by Apple Computer, Inc., all rights reserved.
- #pragma once
- #ifndef TWVector_h
- #define TWVector_h
-
- #pragma import on
-
- #if PRAGMA_STRUCT_ALIGN
- #pragma options align=power
- #endif
-
- #include "TermIndex.h"
-
- #pragma IA_BEGIN_EXPORTS
-
- // TWComponent: what TWVector's are made of.
- struct TWComponent {
- TermID termID; // the TermID
- float weight; // the normalized weight of the term
- private:
- void* operator new(size_t size); // stack or array allocate only
- };
-
- // TWVector: available from a VectorAccessor.
- class TWVector : public IAObject {
- public:
- TWVector(DocLength l);
- ~TWVector();
-
- float Similarity(TWVector* other);
- void Normalize();
-
- TWVector* Sum(TWVector* other);
-
- void SetDocumentLength(DocLength l) {length = l;}
- void SetComponents(TWComponent* c) {components = c;}
- DocLength GetDocumentLength() const {return length;}
- TWComponent* GetComponents() const {return components;}
-
- bool HasNegativeComponents() const;
- IABlockSize StoreSize() const;
- TWVector* DeepCopy() const;
- void Store( IAOutputBlock *output ) const;
- TWVector* Restore( IAInputBlock *input ) const;
-
- // SortByTerm is the normal ordering for the vectors
- void SortByTerm();
- // SortByWeight is a special ordering for the vectors to be used
- // during human-output and truncation operations it sorts by size of the component
- void SortByWeight();
- // SortByAbsoluteWeight is a special ordering for the vectors to be used during human-output
- // and truncation operations it sorts by absolute value of the component weight
- void SortByAbsoluteWeight();
- // Truncate removes the terms with the lowest weights until maxTerms or fewer are left
- void Truncate(uint32 maxTerms );
- // TruncateByAbsoluteValue removes the terms with the lowest absolute weights until maxTerms or fewer are left
- void TruncateByAbsoluteValue(uint32 maxTerms );
-
- // This is a cluster centroid representing totalVectorCount-1 other vectors.
- // The parameter is a new vector to be incorporated into this (the centroid).
- // The new centroid is returned.
- // The formula: result = (n-1/n)*this + (1/n)*newVector
- TWVector* AddIntoAverage(const TWVector *newVector,
- uint32 totalVectorCount, bool invertSecondVector = false);
- // returns a NEW TWVector, does not allow negative components in vector.
- TWVector* AddWeighted(const TWVector *vector,
- float weightFactor1, float weightFactor2);
- // returns a NEW TWVector, DOES INDEED allow negative components in vector.
- TWVector* AddWeightedAllowNegatives(const TWVector *vector,
- float weightFactor1, float weightFactor2);
-
- // bool Equal(TWVector* other);
-
- private:
- TWVector(TWVector&);
- DocLength length; // the number of components in the vector
- TWComponent* components; // an array of TWComponents
-
- };
-
- #pragma IA_END_EXPORTS
-
- #if PRAGMA_STRUCT_ALIGN
- #pragma options align=reset
- #endif
-
- #pragma import reset
-
- #endif
-